####################################################################################################
### Title: People Consistently View Elections and Civil Liberties as Key Components of Democracy ###
### Content: Cleaning the US dataset                                                             ###
### Date: August 24, 2024                                                                        ###
####################################################################################################

### Set-up ----
## Clean the working environment and set the working directory
rm(list = ls())
setwd("~/Desktop/Science_Replication/data_cleaning")

## Install the cjdata package (if not yet installed)
# library(devtools)  # version 2.4.3
# install_github(repo = "yhoriuchi/cjdata")

## Load the required packages
library(tidyverse) # version 2.0.0
library(cjdata)    # version 0.1.0

## Read the raw dataset
df <- read_Qualtrics("raw_US.csv")

### Data cleaning ----
## Recode variables of respondent characteristics
# Minority status (1 = minority; 0 = otherwise)
df <- df %>% mutate(minority_bin = case_when(
  minority == "Yes" ~ 1,
  minority == "No" | minority == "Not sure" ~ 0
))
df$minority_bin <- factor(df$minority_bin, 0:1, c("Non-Minority", "Minority"))
table(df$minority_bin)

# Gender (1 = female; 0 = male)
df <- df %>% rename(gender5 = gender)
df <- df %>% mutate(gender_bin = case_when(
  gender5 == "Female" ~ 1,
  gender5 == "Male" ~ 0
))
df$gender_bin <- factor(df$gender_bin, 0:1, c("Male", "Female"))
table(df$gender_bin)

# Self-reported political ideology (1 = right; 0 = left)
df <- df %>% mutate(ideo_bin = case_when(
  political == "Strongly Left" |
    political == "Moderately Left" |
    political == "Somewhat Left" ~ 0,
  political == "Strongly Right" |
    political == "Moderately Right" |
    political == "Somewhat      Right" ~ 1
))
df$ideo_bin <- factor(df$ideo_bin, 0:1, c("Left", "Right"))
table(df$ideo_bin)

# Education (1 = college; 0 = no college)
df <- df %>% mutate(edu_bin = case_when(
  edu == "Less than high school" |
    edu == "High school or equivalent" |
    edu == "Some college, no degree" |
    edu == "Associate's Degree (AA)" ~ 0,
  edu == "Bachelor's Degree (BA, BS, BBA)" |
    edu == "Advanced degree (MA, MS, MBA, PhD, JD, MD, etc.)" ~ 1
))
df$edu_bin <- factor(df$edu_bin, 0:1, c("No College", "College"))
table(df$edu_bin)

# Geopolitical alignment (1 = pro-China; 0 = pro-US)
df <- df %>% mutate(pro_china = case_when(
  china_us1 == "Probably prefer the United States" |
    china_us1 == "Definitely prefer the United States" ~ 0,
  china_us1 == "Probably prefer China" |
    china_us1 == "Definitely prefer China" ~ 1
))
df$pro_china <- factor(df$pro_china, 0:1 , c("Pro-US", "Pro-China"))
table(df$pro_china)

## Subset and reshape dataset into wide format that each row is a distinct conjoint task
df_cj <- cjdata::reshape_conjoint(df, ResponseId, paste0("Q1.", seq(from = 1, to = 9, by = 4)))

## Rename the variables
df_cj <- df_cj %>% 
  rename(econ = "The economic situations of the rich and the poor are…",
         populist = "When making policies, politicians…",
         direct = "The people vote directly on policy decisions…",
         civil = "Citizens’ individual liberties, such as freedom of speech, religion, and assembly are…",
         gender = "The rights of men and women are…",
         expert = "Independent, non-elected experts have…",
         obedient = "When citizens dislike their government’s policies…",
         election = "Elections for political leadership in this country are…",
         leader = "When making decisions, the country’s leader...")

## Reorder the factors
# Electoral democracy
df_cj$election <- 
  factor(df_cj$election, 
         levels = c("Not held or Non-existent", "Biased in favor of the political group that currently holds power", "Freely and fairly contested by various political groups"), 
         labels = c("Elections are not held", "Elections are biased", "Elections are free and fair"))

# Liberal democracy
df_cj$civil <- 
  factor(df_cj$civil, 
         levels = c("Not at all protected by the law", "Weakly protected by the law", "Strongly protected by the law"), 
         labels = c("Civil liberties are not at all protected", "Civil liberties are weakly protected", "Civil liberties are strongly protected"))

# Institutional democracy
df_cj$leader <- 
  factor(df_cj$leader, 
         levels = c("Can almost always bypass the legislature and courts' authority", "Can sometimes bypass the legislature and courts' authority", "Must respect the legislature and courts' authority"), 
         labels = c("Leader is weakly constrained", "Leader is somewhat constrained", "Leader is highly constrained"))

# Populist democracy
df_cj$populist <- 
  factor(df_cj$populist, 
         levels = c("Rarely follow what the majority wants", "Sometimes follow what the majority wants", "Frequently follow what the majority wants"), 
         labels = c("Leader rarely follows the majority", "Leader sometimes follows the majority", "Leader frequently follows the majority"))

# Loyalist democracy
df_cj$obedient <- 
  factor(df_cj$obedient, 
         levels = c("Most obey the government", "Some obey and some challenge the government", "Most challenge the government"), 
         labels = c("Dissidents mostly obey the gov't", "Dissidents occasionally obey the gov't", "Dissidents mostly challenge the gov't"))

# Substantive democracy (economy)
df_cj$econ <- 
  factor(df_cj$econ, 
         levels = c("Highly unequal", "Somewhat unequal", "Generally equal"), 
         labels = c("Economic equality is very low", "Economic equality is somewhat low", "Economic equality is high"))

# Substantive democracy (gender)
df_cj$gender <- 
  factor(df_cj$gender, 
         levels = c("Highly unequal", "Somewhat unequal", "Equal"), 
         labels = c("Gender equality is very low", "Gender equality is somewhat low", "Gender equality is high"))

# Technocratic democracy
df_cj$expert <- 
  factor(df_cj$expert, 
         levels = c("Not much influence on policy", "Some influence on policy", "A great deal of influence on policy"), 
         labels = c("Experts have small influence on policy", "Experts have some influence on policy", "Experts have much influence on policy"))

# Direct democracy
df_cj$direct <- 
  factor(df_cj$direct, 
         levels = c("Rarely", "Sometimes", "Frequently"),
         labels = c("Policies are rarely voted on", "Policies are sometimes voted on", "Policies are frequently voted on"))

## Incorporate respondent IDs and other covariates
df <- df %>% rename(id = ResponseId)
df_cj <- merge(df_cj, df, by = "id")

## Indicate the country
df_cj$country <- "US"

## Export the dataset
write.csv(df_cj, "df_US.csv", row.names = FALSE)
